Load in necessary packages

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(sf)
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(mapview)
library(tigris)
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
## 
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
## 
##     plot
library(censusapi)
## 
## Attaching package: 'censusapi'
## The following object is masked from 'package:methods':
## 
##     getFunction
library(leaflet)
library(lehdr)
library(fs)

options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

Methodology

Essential Worker Designation

The data used to assess the % of workers deemed essential was determined using the Labor Market Information (LMI) Institute’s Standard Occupation Classification (SOC) codes connectd to essential businesses. (https://www.lmiontheweb.org/more-than-half-of-u-s-workers-in-critical-occupations-in-the-fight-against-covid-19/). The SOC codes the LMI institute provided are based on the Cybersecurity and Infrastructure Security Agency’s list from April 17, 2020 (https://www.cisa.gov/sites/default/files/publications/Version_3.0_CISA_Guidance_on_Essential_Critical_Infrastructure_Workers_4.pdf). These codes have the advantage of being directly linked to census respondees so that each respondee has one SOC code.

Load in PUMAs for Bay Area and Visualize

Load Bay Area PUMA-level data. You can see how PUMA (Public Use Microdata Area) level regions compare to tract/blockgroup level regions here (https://tigerweb.geo.census.gov/tigerweb/) Visualization of the PUMAs

# get Bay Area County PUMAS
bay_area_county_names <-
  c(
    "Alameda",
    "Contra Costa",
    "Marin",
    "Napa",
    "San Francisco",
    "San Mateo",
    "Santa Clara",
    "Solano",
    "Sonoma"
  )

bay_pumas <- pumas("CA", cb=F, progress_bar=F) %>%
  mutate(county = substr(GEOID10, start = 1, stop =5)) %>% 
  filter(county %in% c("06085","06081","06075","06041","06097","06055","06095","06013","06001")) 

bay_pumas_codes <-
  bay_pumas %>% pull(PUMACE10)

projection <- "+proj=utm +zone=10 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=ft +no_defs"

bay_area_county_names <-
  c("Alameda","Contra Costa","Marin","Napa","San Francisco","San Mateo","Santa Clara","Solano","Sonoma")

#Water to clean up boundaries of PUMAs
water <- 
  bay_area_county_names %>% 
  map_dfr(function(county){ # this is the tidyverse version of a for loop. You can practice writing this the normal for loop way.
    area_water("CA", county, progress_bar = FALSE) %>% as.data.frame()
  }) %>% 
  st_as_sf() %>% 
  st_set_crs(st_crs(counties("CA", cb=F, progress_bar = FALSE)
  )) %>% 
  st_union() %>% 
  as_Spatial() %>% 
  sp::disaggregate() %>% 
  st_as_sf() %>% 
  st_set_crs(st_crs(counties("CA", cb=F, progress_bar = FALSE)
  )) %>% 
  mutate(area = st_area(.) %>% as.numeric()) %>% 
  filter(area == max(area)) %>% 
  dplyr::select(-area)
## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes

## Warning in bind_rows_(x, .id): Vectorizing 'sfc_POLYGON' elements may not
## preserve their attributes
bay_pumas_fixed <-
  bay_pumas %>% 
  st_difference(water)
## although coordinates are longitude/latitude, st_difference assumes that they are planar
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
mapview(bay_pumas_fixed)
print(str_c("There are ",dim(bay_pumas_fixed)[[1]], " PUMAs in the Bay Area")) #Nrows
## [1] "There are 55 PUMAs in the Bay Area"

Load in SOC Essential Code Dataset

data_dir <- "/Users/spencerrobinson/Documents/covid19/Spencer_Robinson/"
raw_soc <- readxl::read_xlsx(str_c(data_dir,"SOC-Codes-CISA-Critical-Infrastructure-Workers-with-OES-Data.xlsx"))
soc <- raw_soc %>% 
  mutate(Critical = replace_na(Critical, FALSE)) %>% 
  mutate(Critical = replace(Critical, Critical == "X",TRUE)) %>% 
  mutate(`SOC Occupation` = str_remove(`SOC Occupation`,"-")) %>%   #Places SOC code in the format used in PUMS data
  mutate(Critical = as.logical(Critical)) %>% 
  select(`SOC Occupation`,`Occupation Description`,Critical)

#Visualize Breakdown
soc %>% 
  group_by(Critical) %>% 
  summarize(SOCs = n()) %>% 
  mutate(Critical = replace(Critical, Critical == FALSE ,"Nonessential")) %>% 
  mutate(Critical = replace(Critical, Critical == TRUE ,"Essential")) %>% 
  ggplot(aes(Critical,SOCs)) + geom_col()

Get SOC census PUMA data

# puma_soc <- read_csv("/Users/spencerrobinson/pCloud Drive/SFBI/Data Library/PUMS/csv_pca/psam_p06.csv")
# colnames(puma_soc)
# bay_puma_soc <- puma_soc %>% 
#   filter(PUMA %in% bay_pumas_codes) #Filters for just 9 Bay Area Counties
# saveRDS(bay_puma_soc, file = "/Users/spencerrobinson/Documents/covid19/Spencer_Robinson/bay_puma_soc.rds")
bay_puma_soc <- readRDS(file = "/Users/spencerrobinson/Documents/covid19/Spencer_Robinson/bay_puma_soc.rds") #PUMA = Public Use Microdata Area

Visualize SOC Data for Bay Area

soc_essential <- bay_puma_soc %>%
  select("PUMA","SOCP") %>% 
  left_join(soc, by = c("SOCP" = "SOC Occupation")) %>% 
  mutate(`Occupation Description` = ifelse(is.na(SOCP), replace_na(`Occupation Description`, "Not In Labor Force"),`Occupation Description`)) #Remove those not in the labor force

top_10_visualize_with_na <- soc_essential %>% 
  group_by(SOCP, `Occupation Description`) %>% 
  tally() %>% 
  arrange(desc(n)) %>%
  head(n = 10) %>% 
  ggplot(aes(reorder(SOCP,-n), n))+geom_col()+theme(axis.text.x = element_text(angle = 45, vjust = 0.5))
top_10_visualize_with_na

top_10 <- soc_essential %>% 
  filter(!is.na(SOCP)) %>%  #Filter out those not in work force
  group_by(SOCP, `Occupation Description`,Critical) %>% 
  tally() %>% 
  arrange(desc(n)) %>%
  head(n = 10) 

top_10
## # A tibble: 10 x 4
## # Groups:   SOCP, Occupation Description [10]
##    SOCP   `Occupation Description`                                Critical     n
##    <chr>  <chr>                                                   <lgl>    <int>
##  1 1191XX <NA>                                                    NA        7881
##  2 151252 Software Developers                                     TRUE      7864
##  3 412031 Retail Salespersons                                     FALSE     4997
##  4 252020 <NA>                                                    NA        4786
##  5 412010 <NA>                                                    NA        4282
##  6 291141 Registered Nurses                                       TRUE      4078
##  7 132011 Accountants and Auditors                                FALSE     4032
##  8 436014 Secretaries and Administrative Assistants, Except Lega… FALSE     3859
##  9 434051 Customer Service Representatives                        TRUE      3532
## 10 411011 First-Line Supervisors of Retail Sales Workers          TRUE      3449
top_10_visualize <- top_10 %>%
  filter(!is.na(SOCP)) %>% #Filter out those not in work force
  ggplot(aes(reorder(SOCP,-n), n, fill = Critical ))+geom_col()+labs(x = "SOCP Code")+ theme(axis.text.x = element_text(angle = 45, vjust = 0.5))

top_10_visualize

essential_visualize <- soc_essential %>% 
  filter(!is.na(SOCP)) %>% #Filter out those not in work force
  group_by(Critical) %>% 
  tally() %>% 
  ggplot()+geom_col(aes(x = "", y =  n , fill = Critical), stat = "identity", position = "fill")
## Warning: Ignoring unknown parameters: stat
essential_visualize

Processing SOCs to fill in NAs

#Using fraction Essential instead of TRUE/FALSE
soc_frac <- soc %>% 
  mutate(Critical = as.numeric(Critical)) %>% 
  rename("fracEssential" = Critical)

soc_4_dig <- soc_frac %>% 
  mutate(`SOC Occupation`  = str_c(substr(`SOC Occupation`, 1,4),"XX")) %>% 
  group_by(`SOC Occupation` , fracEssential) %>% 
  summarize(count = n()) %>% 
  spread(fracEssential,count) %>% 
  mutate(`1` = replace_na(`1`,0)) %>% 
  mutate(`0` = replace_na(`0`, 0)) %>% 
  rename("Nonessential" = `0`, "Essential" = `1` ) %>% 
  mutate(fracEssential = Essential/(Essential+Nonessential)) %>% 
  select(`SOC Occupation` ,fracEssential)

soc_5_dig <- soc_frac %>% 
  mutate(`SOC Occupation` = str_c(substr(`SOC Occupation`, 1,5),"X")) %>% 
  group_by(`SOC Occupation` , fracEssential) %>% 
  summarize(count = n()) %>% 
  spread(fracEssential,count) %>% 
  mutate(`1` = replace_na(`1`,0)) %>% 
  mutate(`0` = replace_na(`0`, 0)) %>% 
  rename("Nonessential" = `0`, "Essential" = `1` ) %>% 
  mutate(fracEssential = Essential/(Essential+Nonessential)) %>% 
  select(`SOC Occupation` ,fracEssential)


soc_frac_adj <- soc_frac %>% 
  full_join(soc_4_dig) %>% 
  full_join(soc_5_dig)
## Joining, by = c("SOC Occupation", "fracEssential")
## Joining, by = c("SOC Occupation", "fracEssential")

See improvement

soc_essential_frac <- bay_puma_soc %>%
  select("PUMA","SOCP") %>% 
  left_join(soc_frac_adj, by = c("SOCP" = "SOC Occupation")) %>% 
  mutate(`Occupation Description` = ifelse(is.na(SOCP), replace_na(`Occupation Description`, "Not In Labor Force"),`Occupation Description`)) #Remove those not in the labor force

top_10_visualize_with_na_adj <- soc_essential_frac %>% 
  group_by(SOCP, `Occupation Description`) %>% 
  tally() %>% 
  arrange(desc(n)) %>%
  head(n = 10) %>% 
  ggplot(aes(reorder(SOCP,-n), n))+geom_col()+theme(axis.text.x = element_text(angle = 45, vjust = 0.5))
top_10_visualize_with_na_adj

top_10_visualize_adj <- soc_essential_frac %>% 
  filter(!is.na(SOCP)) %>%  #Filter out those not in work force
  group_by(SOCP, `Occupation Description`, fracEssential) %>% 
  tally() %>% 
  arrange(desc(n)) %>%
  head(n = 10)  %>%
  filter(!is.na(SOCP)) %>% #Filter out those not in work force
  ggplot(aes(reorder(SOCP,-n), n, fill = fracEssential ))+geom_col()+labs(x = "SOCP Code")+ theme(axis.text.x = element_text(angle = 45, vjust = 0.5))
top_10_visualize_adj

essential_visualize <- soc_essential_frac %>% 
  filter(!is.na(SOCP)) %>% #Filter out those not in work force
  mutate(fracEssential_present = ifelse(!is.na(fracEssential),TRUE,FALSE)) %>% 
  group_by(fracEssential_present) %>% 
  tally() %>% 
  ggplot()+geom_col(aes(x = "", y =  n , fill = fracEssential_present), stat = "identity", position = "fill")
## Warning: Ignoring unknown parameters: stat
essential_visualize